#include "mupdf/fitz.h"
#include "mupdf/pdf.h"

static void
pdf_clean_stream_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *orig_res, fz_cookie *cookie, int own_res,
		pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after_text, void *arg,
		int sanitize, int ascii)
{
	pdf_processor *proc_buffer = NULL;
	pdf_processor *proc_filter = NULL;
	pdf_obj *res = NULL;
	pdf_obj *ref;
	fz_buffer *buffer;

	if (!obj)
		return;

	fz_var(res);
	fz_var(proc_buffer);
	fz_var(proc_filter);

	buffer = fz_new_buffer(ctx, 1024);

	fz_try(ctx)
	{
		if (own_res)
		{
			pdf_obj *r = pdf_dict_get(ctx, obj, PDF_NAME(Resources));
			if (r)
				orig_res = r;
		}

		res = pdf_new_dict(ctx, doc, 1);

		proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
		proc_filter = pdf_new_filter_processor_with_text_filter(ctx, doc, proc_buffer, orig_res, res, text_filter, after_text, arg);

		pdf_process_contents(ctx, proc_filter, doc, orig_res, obj, cookie);
		pdf_close_processor(ctx, proc_filter);
		pdf_close_processor(ctx, proc_buffer);

		pdf_update_stream(ctx, doc, obj, buffer, 0);

		if (own_res)
		{
			ref = pdf_add_object(ctx, doc, res);
			pdf_dict_put_drop(ctx, obj, PDF_NAME(Resources), ref);
		}
	}
	fz_always(ctx)
	{
		pdf_drop_processor(ctx, proc_filter);
		pdf_drop_processor(ctx, proc_buffer);
		fz_drop_buffer(ctx, buffer);
		pdf_drop_obj(ctx, res);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}
}

static void
pdf_clean_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *orig_res, fz_cookie *cookie, int sanitize, int ascii)
{
	pdf_processor *proc_buffer = NULL;
	pdf_processor *proc_filter = NULL;
	pdf_obj *res = NULL;
	pdf_obj *ref;
	pdf_obj *charprocs;
	int i, l;

	fz_var(res);
	fz_var(proc_buffer);
	fz_var(proc_filter);

	fz_try(ctx)
	{
		res = pdf_dict_get(ctx, obj, PDF_NAME(Resources));
		if (res)
			orig_res = res;
		res = NULL;

		res = pdf_new_dict(ctx, doc, 1);

		charprocs = pdf_dict_get(ctx, obj, PDF_NAME(CharProcs));
		l = pdf_dict_len(ctx, charprocs);

		for (i = 0; i < l; i++)
		{
			pdf_obj *val = pdf_dict_get_val(ctx, charprocs, i);
			fz_buffer *buffer = fz_new_buffer(ctx, 1024);
			fz_try(ctx)
			{
				proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
				if (sanitize)
				{
					proc_filter = pdf_new_filter_processor(ctx, doc, proc_buffer, orig_res, res);
					pdf_process_contents(ctx, proc_filter, doc, orig_res, val, cookie);
					pdf_close_processor(ctx, proc_filter);
				}
				else
				{
					pdf_process_contents(ctx, proc_filter, doc, orig_res, val, cookie);
				}
				pdf_close_processor(ctx, proc_buffer);

				pdf_update_stream(ctx, doc, val, buffer, 0);
			}
			fz_always(ctx)
			{
				pdf_drop_processor(ctx, proc_filter);
				pdf_drop_processor(ctx, proc_buffer);
				fz_drop_buffer(ctx, buffer);
			}
			fz_catch(ctx)
			{
				fz_rethrow(ctx);
			}
		}

		/* ProcSet - no cleaning possible. Inherit this from the old dict. */
		pdf_dict_put(ctx, res, PDF_NAME(ProcSet), pdf_dict_get(ctx, orig_res, PDF_NAME(ProcSet)));

		ref = pdf_add_object(ctx, doc, res);
		pdf_dict_put_drop(ctx, obj, PDF_NAME(Resources), ref);
	}
	fz_always(ctx)
	{
		pdf_drop_obj(ctx, res);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}
}

/*
	Clean a loaded pages rendering operations,
	with an optional post processing step.

	Firstly, this filters the PDF operators used to avoid (some cases
	of) repetition, and leaves the page in a balanced state with an
	unchanged top level matrix etc. At the same time, the resources
	used by the page contents are collected.

	Next, the resources themselves are cleaned (as appropriate) in the
	same way.

	Next, an optional post processing stage is called.

	Finally, the page contents and resources in the documents page tree
	are replaced by these processed versions.

	Annotations remain unaffected.

	page: A page loaded by pdf_load_page.

	cookie: A pointer to an optional fz_cookie structure that can be used
	to track progress, collect errors etc.
*/
void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie, pdf_page_contents_process_fn *proc_fn, void *arg, int sanitize, int ascii)
{
	pdf_filter_page_contents(ctx, doc, page, cookie, proc_fn, NULL, NULL, arg, sanitize, ascii);
}

/*
	Performs the same task as
	pdf_clean_page_contents, but with an optional text filter
	function.

	text_filter: Function to assess whether a given character
	should be kept (return 0) or removed (return 1).

	after_text: Function called after each text object is closed
	to allow other output to be sent.

	arg: Opaque value to be passed to callback functions.
*/
void pdf_filter_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie,
		pdf_page_contents_process_fn *proc_fn, pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after_text, void *proc_arg,
		int sanitize, int ascii)
{
	pdf_processor *proc_buffer = NULL;
	pdf_processor *proc_filter = NULL;
	pdf_obj *new_obj = NULL;
	pdf_obj *new_ref = NULL;
	pdf_obj *res = NULL;
	pdf_obj *obj;
	pdf_obj *contents;
	pdf_obj *resources;
	fz_buffer *buffer;

	fz_var(new_obj);
	fz_var(new_ref);
	fz_var(res);
	fz_var(proc_buffer);
	fz_var(proc_filter);

	buffer = fz_new_buffer(ctx, 1024);

	fz_try(ctx)
	{
		contents = pdf_page_contents(ctx, page);
		resources = pdf_page_resources(ctx, page);

		proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
		if (sanitize)
		{
			res = pdf_new_dict(ctx, doc, 1);
			proc_filter = pdf_new_filter_processor_with_text_filter(ctx, doc, proc_buffer, resources, res, text_filter, after_text, proc_arg);
			pdf_process_contents(ctx, proc_filter, doc, resources, contents, cookie);
			pdf_close_processor(ctx, proc_filter);
		}
		else
		{
			res = pdf_keep_obj(ctx, resources);
			pdf_process_contents(ctx, proc_buffer, doc, resources, contents, cookie);
		}
		pdf_close_processor(ctx, proc_buffer);

		/* Deal with page content stream. */

		if (pdf_is_array(ctx, contents))
		{
			/* create a new object to replace the array */
			new_obj = pdf_new_dict(ctx, doc, 1);
			new_ref = pdf_add_object(ctx, doc, new_obj);
			contents = new_ref;
			pdf_dict_put(ctx, page->obj, PDF_NAME(Contents), contents);
		}
		else
		{
			pdf_dict_del(ctx, contents, PDF_NAME(Filter));
			pdf_dict_del(ctx, contents, PDF_NAME(DecodeParms));
		}

		pdf_update_stream(ctx, doc, contents, buffer, 0);

		/* Now deal with resources. The spec allows for Type3 fonts and form
		 * XObjects to omit a resource dictionary and look in the parent.
		 * Avoid that by flattening here as part of the cleaning. This could
		 * conceivably cause changes in rendering, but we don't care. */

		/* ExtGState */
		obj = pdf_dict_get(ctx, res, PDF_NAME(ExtGState));
		if (obj)
		{
			int i, l;

			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME(SMask));
				if (!o)
					continue;
				o = pdf_dict_get(ctx, o, PDF_NAME(G));
				if (!o)
					continue;
				/* Transparency group XObject */
				pdf_clean_stream_object(ctx, doc, o, resources, cookie, 1, text_filter, after_text, proc_arg, sanitize, ascii);
			}
		}

		/* Pattern */
		obj = pdf_dict_get(ctx, res, PDF_NAME(Pattern));
		if (obj)
		{
			int i, l;
			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *pat_res;
				pdf_obj *pat = pdf_dict_get_val(ctx, obj, i);
				if (!pat)
					continue;
				pat_res = pdf_dict_get(ctx, pat, PDF_NAME(Resources));
				if (pat_res == NULL)
					pat_res = resources;
				if (pdf_dict_get_int(ctx, pat, PDF_NAME(PatternType)) == 1)
					pdf_clean_stream_object(ctx, doc, pat, pat_res, cookie, 0, text_filter, after_text, proc_arg, sanitize, ascii);
			}
		}

		/* XObject */
		obj = pdf_dict_get(ctx, res, PDF_NAME(XObject));
		if (obj)
		{
			int i, l;
			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *xobj_res;
				pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i);
				if (!xobj)
					continue;
				xobj_res = pdf_dict_get(ctx, xobj, PDF_NAME(Resources));
				if (xobj_res == NULL)
					xobj_res = resources;
				if (pdf_name_eq(ctx, PDF_NAME(Form), pdf_dict_get(ctx, xobj, PDF_NAME(Subtype))))
					pdf_clean_stream_object(ctx, doc, xobj, xobj_res, cookie, 1, text_filter, after_text, proc_arg, sanitize, ascii);
			}
		}

		/* Font */
		obj = pdf_dict_get(ctx, res, PDF_NAME(Font));
		if (obj)
		{
			int i, l;
			l = pdf_dict_len(ctx, obj);
			for (i = 0; i < l; i++)
			{
				pdf_obj *o = pdf_dict_get_val(ctx, obj, i);
				if (!o)
					continue;
				if (pdf_name_eq(ctx, PDF_NAME(Type3), pdf_dict_get(ctx, o, PDF_NAME(Subtype))))
					pdf_clean_type3(ctx, doc, o, resources, cookie, sanitize, ascii);
			}
		}

		/* ProcSet - no cleaning possible. Inherit this from the old dict. */
		obj = pdf_dict_get(ctx, resources, PDF_NAME(ProcSet));
		if (obj)
			pdf_dict_put(ctx, res, PDF_NAME(ProcSet), obj);

		/* ColorSpace - no cleaning possible. */
		/* Properties - no cleaning possible. */

		if (proc_fn)
			(*proc_fn)(ctx, buffer, res, proc_arg);

		/* Update resource dictionary */
		if (sanitize)
		{
			pdf_dict_put(ctx, page->obj, PDF_NAME(Resources), res);
		}
	}
	fz_always(ctx)
	{
		pdf_drop_processor(ctx, proc_filter);
		pdf_drop_processor(ctx, proc_buffer);
		fz_drop_buffer(ctx, buffer);
		pdf_drop_obj(ctx, new_obj);
		pdf_drop_obj(ctx, new_ref);
		pdf_drop_obj(ctx, res);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}
}

/*
	Clean a loaded annotations rendering operations,
	with an optional post processing step.

	Each appearance stream in the annotation is processed.

	Firstly, this filters the PDF operators used to avoid (some cases
	of) repetition, and leaves the page in a balanced state with an
	unchanged top level matrix etc. At the same time, the resources
	used by the page contents are collected.

	Next, the resources themselves are cleaned (as appropriate) in the
	same way.

	Next, an optional post processing stage is called.

	Finally, the updated stream of operations is reinserted into the
	appearance stream.

	annot: An annotation loaded by pdf_load_annot.

	cookie: A pointer to an optional fz_cookie structure that can be used
	to track progress, collect errors etc.
*/
void pdf_clean_annot_contents(fz_context *ctx, pdf_document *doc, pdf_annot *annot, fz_cookie *cookie, pdf_page_contents_process_fn *proc_fn, void *proc_arg, int sanitize, int ascii)
{
	pdf_filter_annot_contents(ctx, doc, annot, cookie, proc_fn, NULL, NULL, proc_arg, sanitize, ascii);
}

/*
	Performs the same task as
	pdf_clean_annot_contents, but with an optional text filter
	function.

	text_filter: Function to assess whether a given character
	should be kept (return 0) or removed (return 1).

	after_text: Function called after each text object is closed
	to allow other output to be sent.

	arg: Opaque value to be passed to callback functions.
*/
void pdf_filter_annot_contents(fz_context *ctx, pdf_document *doc, pdf_annot *annot, fz_cookie *cookie,
	pdf_page_contents_process_fn *proc, pdf_text_filter_fn *text_filter, pdf_after_text_object_fn *after_text, void *arg, int sanitize, int ascii)
{
	pdf_obj *ap;
	int i, n;

	ap = pdf_dict_get(ctx, annot->obj, PDF_NAME(AP));
	if (ap == NULL)
		return;

	n = pdf_dict_len(ctx, ap);
	for (i = 0; i < n; i++)
	{
		pdf_obj *v = pdf_dict_get_val(ctx, ap, i);

		if (v == NULL)
			continue;

		pdf_clean_stream_object(ctx, doc, v, NULL, cookie, 1, text_filter, after_text, arg, sanitize, ascii);
	}
}

static void
pdf_redact_end_page(fz_context *ctx, fz_buffer *buf, pdf_obj *res, void *opaque)
{
	pdf_page *page = opaque;
	pdf_annot *annot;
	pdf_obj *qp;
	int i, n;

	fz_append_string(ctx, buf, "0 g\n");

	for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
	{
		if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
		{
			qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
			n = pdf_array_len(ctx, qp);
			if (n > 0)
			{
				for (i = 0; i < n; i += 8)
				{
					fz_quad q = pdf_to_quad(ctx, qp, i);
					fz_append_printf(ctx, buf, "%g %g m\n", q.ll.x, q.ll.y);
					fz_append_printf(ctx, buf, "%g %g l\n", q.lr.x, q.lr.y);
					fz_append_printf(ctx, buf, "%g %g l\n", q.ur.x, q.ur.y);
					fz_append_printf(ctx, buf, "%g %g l\n", q.ul.x, q.ul.y);
					fz_append_string(ctx, buf, "f\n");
				}
			}
			else
			{
				fz_rect r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
				fz_append_printf(ctx, buf, "%g %g m\n", r.x0, r.y0);
				fz_append_printf(ctx, buf, "%g %g l\n", r.x1, r.y0);
				fz_append_printf(ctx, buf, "%g %g l\n", r.x1, r.y1);
				fz_append_printf(ctx, buf, "%g %g l\n", r.x0, r.y1);
				fz_append_string(ctx, buf, "f\n");
			}
		}
	}
}

static int
pdf_redact_text_filter(fz_context *ctx, void *opaque, int *ucsbuf, int ucslen, fz_matrix trm, fz_matrix ctm, fz_rect bbox)
{
	pdf_page *page = opaque;
	pdf_annot *annot;
	pdf_obj *qp;
	fz_rect r;
	fz_quad q;
	int i, n;

	trm = fz_concat(trm, ctm);

	for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
	{
		if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
		{
			qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints));
			n = pdf_array_len(ctx, qp);
			if (n > 0)
			{
				for (i = 0; i < n; i += 8)
				{
					q = pdf_to_quad(ctx, qp, i);
					if (fz_is_point_inside_quad(fz_make_point(trm.e, trm.f), q))
						return 1;
				}
			}
			else
			{
				r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect));
				if (fz_is_point_inside_rect(fz_make_point(trm.e, trm.f), r))
					return 1;
			}
		}
	}

	return 0;
}

int
pdf_redact_page(fz_context *ctx, pdf_document *doc, pdf_page *page, pdf_redact_options *opts)
{
	pdf_annot *annot;
	int has_redactions = 0;
	int no_black_boxes = 0;

	if (opts)
	{
		no_black_boxes = opts->no_black_boxes;
	}

	for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
		if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
			has_redactions = 1;

	if (has_redactions)
	{
		pdf_filter_page_contents(ctx, doc, page, NULL,
			no_black_boxes ? NULL : pdf_redact_end_page,
			pdf_redact_text_filter,
			NULL,
			page,
			1, 1);
	}

	annot = pdf_first_annot(ctx, page);
	while (annot)
	{
		if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact))
		{
			pdf_delete_annot(ctx, page, annot);
			annot = pdf_first_annot(ctx, page);
		}
		else
		{
			annot = pdf_next_annot(ctx, annot);
		}
	}

	doc->redacted = has_redactions;

	return has_redactions;
}
